home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Sprite 1984 - 1993
/
Sprite 1984 - 1993.iso
/
src
/
machserver
/
1.098
/
proc
/
procRecovery.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-10-09
|
11KB
|
394 lines
/*
* procRecovery.c
*
* Routines for process migration recovery.
*
* Copyright 1988, 1989, 1990 Regents of the University of California.
* All rights reserved.
* Permission to use, copy, modify, and distribute this
* software and its documentation for any purpose and without
* fee is hereby granted, provided that the above copyright
* notice appear in all copies. The University of California
* makes no representations about the suitability of this
* software for any purpose. It is provided "as is" without
* express or implied warranty.
*/
#ifndef lint
static char rcsid[] = "$Header: /sprite/src/kernel/proc/RCS/procRecovery.c,v 9.4 90/09/12 13:58:25 jhh Exp $ SPRITE (Berkeley)";
#endif /* not lint */
#include <sprite.h>
#include <proc.h>
#include <procInt.h>
#include <procMigrate.h>
#include <migrate.h>
#include <recov.h>
#include <sync.h>
#include <rpc.h>
#include <hash.h>
#include <stdlib.h>
#include <stdio.h>
static void HostChanged _ARGS_((int hostID, ClientData clientData));
static Sync_Lock recovLock;
Sync_Condition recovCondition = {0};
#define LOCKPTR &recovLock
typedef struct DependInfo {
Proc_PID processID; /* the process ID on our host, and the
* key into the hash table */
Proc_PID peerProcessID; /* the process ID on the other host. */
int hostID; /* the other host for the process
* (home or remote) */
int flags; /* Flags (see below) */
} DependInfo;
/*
* Define constants for use with migration-recovery interaction.
*
* DEPEND_UNREACHABLE - host was unable to receive death notice.
*/
#define DEPEND_UNREACHABLE 1
/*
* Flags for HostChanged.
* HOST_CRASHED - a host is down, or rebooted.
* HOST_UP - host is reachable.
*/
#define HOST_CRASHED 0
#define HOST_UP 1
/*
* Hash tables for dependency information.
*/
Hash_Table dependHashTableStruct;
Hash_Table *dependHashTable = &dependHashTableStruct;
/*
* Define a structure for keeping track of dependencies while doing
* a hash search -- we store the key in a list and do individual hash
* lookups once the search is over.
*/
typedef struct {
List_Links links; /* Pointers within list. */
Proc_PID processID; /* Key. */
} DependChain;
#define KERNEL_HASH
/*
*----------------------------------------------------------------------
*
* ProcRecovInit --
*
* Initialize the data structures for process migration recovery.
*
* Results:
* None.
*
* Side effects:
* Hash table is initialized and the recovery system is told to
* alert us when it determines that a host has crashed.
*
*----------------------------------------------------------------------
*/
void
ProcRecovInit()
{
Hash_Init(dependHashTable, 0, HASH_ONE_WORD_KEYS);
Recov_CrashRegister(HostChanged, (ClientData) HOST_CRASHED);
Sync_LockInitDynamic(&recovLock, "Proc:recovLock");
}
/*
*----------------------------------------------------------------------
*
* HostChanged --
*
* Kill off any migrated processes associated with a host that has
* crashed. Notify anyone waiting for a change in host.
*
* Results:
* None.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
static ENTRY void
HostChanged(hostID, clientData)
int hostID; /* Host that has rebooted */
ClientData clientData; /* Whether the host crashed. */
{
Hash_Search hashSearch;
register Hash_Entry *hashEntryPtr;
DependInfo *dependPtr;
List_Links dependList;
DependChain *chainPtr;
Boolean crashed;
LOCK_MONITOR;
crashed = ((int) clientData) == HOST_CRASHED;
Sync_Broadcast(&recovCondition);
Hash_StartSearch(&hashSearch);
List_Init(&dependList);
for (hashEntryPtr = Hash_Next(dependHashTable, &hashSearch);
hashEntryPtr != (Hash_Entry *) NIL;
hashEntryPtr = Hash_Next(dependHashTable, &hashSearch)) {
dependPtr = (DependInfo *) Hash_GetValue(hashEntryPtr);
if (dependPtr->hostID == hostID) {
if (crashed && !(dependPtr->flags & DEPEND_UNREACHABLE)) {
/*
* This process still exists on our host.
*/
Proc_CallFunc(Proc_DestroyMigratedProc,
(ClientData) dependPtr->processID, 0);
} else if (crashed || (dependPtr->flags & DEPEND_UNREACHABLE)){
/*
* Either the host crashed but we already killed the
* process and just want to get rid of the hash entry,
* or it has come back and we want to notify it about
* a dead process.
*/
chainPtr = (DependChain *) malloc(sizeof(DependChain));
chainPtr->processID = dependPtr->processID;
List_InitElement(&chainPtr->links);
List_Insert(&chainPtr->links, LIST_ATREAR(&dependList));
if (!crashed) {
Proc_CallFunc(ProcMigKillRemoteCopy,
(ClientData) dependPtr->peerProcessID, 0);
}
}
}
}
/*
* Now clean up the list of dependencies to do Hash_Deletes.
*/
while (!List_IsEmpty(&dependList)) {
chainPtr = (DependChain *) List_First(&dependList);
List_Remove((List_Links *) chainPtr);
#ifdef KERNEL_HASH
hashEntryPtr = Hash_LookOnly(dependHashTable,
(Address) chainPtr->processID);
if (hashEntryPtr != (Hash_Entry *) NIL) {
#else KERNEL_HASH
hashEntryPtr = Hash_FindEntry(dependHashTable,
(Address) chainPtr->processID);
if (hashEntryPtr != (Hash_Entry *) NULL) {
#endif /* KERNEL_HASH */
dependPtr = (DependInfo *) Hash_GetValue(hashEntryPtr);
#ifdef KERNEL_HASH
Hash_Delete(dependHashTable, hashEntryPtr);
#else KERNEL_HASH
Hash_DeleteEntry(dependHashTable, hashEntryPtr);
#endif /* KERNEL_HASH */
Recov_RebootUnRegister(dependPtr->hostID, HostChanged,
clientData);
free ((Address) dependPtr);
}
free((char *) chainPtr);
}
UNLOCK_MONITOR;
}
/*
*----------------------------------------------------------------------
*
* ProcMigAddDependency --
*
* Note a dependency of a process on a remote machine.
*
* Results:
* None.
*
* Side effects:
* Memory is allocated for a structure to be put in the hash table.
*
*----------------------------------------------------------------------
*/
ENTRY void
ProcMigAddDependency(processID, peerProcessID)
Proc_PID processID; /* process that depends on the host */
Proc_PID peerProcessID; /* process on the other host */
{
DependInfo *dependPtr;
Hash_Entry *hashEntryPtr;
Boolean new;
LOCK_MONITOR;
dependPtr = (DependInfo *) malloc(sizeof (DependInfo));
dependPtr->processID = processID;
dependPtr->peerProcessID = peerProcessID;
dependPtr->hostID = Proc_GetHostID(peerProcessID);
dependPtr->flags = 0;
#ifdef KERNEL_HASH
hashEntryPtr = Hash_Find(dependHashTable, (Address) processID);
new = (hashEntryPtr->value == (Address) NIL);
#else KERNEL_HASH
hashEntryPtr = Hash_CreateEntry(dependHashTable, (Address) processID, &new);
#endif /* KERNEL_HASH */
if (!new) {
if (proc_MigDebugLevel > 0) {
if (proc_MigDebugLevel > 4) {
panic("ProcMigAddDependency: process %x already registered.\n",
processID);
} else {
printf(
"%s ProcMigAddDependency: process %x already registered.\n",
"Warning:", processID);
}
}
UNLOCK_MONITOR;
return;
}
Hash_SetValue(hashEntryPtr, (ClientData) dependPtr);
Recov_RebootRegister(dependPtr->hostID, HostChanged, (ClientData) HOST_UP);
UNLOCK_MONITOR;
}
/*
*----------------------------------------------------------------------
*
* ProcMigRemoveDependency --
*
* Remove a process from the table of dependencies on remote machines.
* If the other host wasn't notified, defer the removal until the other
* host is known to be down or rebooted, and notify the other host
* of the death of the process if it should come back. After this
* routine is called, the routines in this file are responsible for
* eventually getting rid of the dependency (e.g., the caller
* won't call again).
*
* Results:
* None.
*
* Side effects:
* The corresponding entry for the process is removed from the
* hash table and freed, or flagged for future removal.
*
*----------------------------------------------------------------------
*/
ENTRY void
ProcMigRemoveDependency(processID, notified)
Proc_PID processID; /* process to remove */
Boolean notified; /* Whether other host was notified of death. */
{
DependInfo *dependPtr;
Hash_Entry *hashEntryPtr;
LOCK_MONITOR;
#ifdef KERNEL_HASH
hashEntryPtr = Hash_LookOnly(dependHashTable, (Address) processID);
if (hashEntryPtr == (Hash_Entry *) NIL) {
#else KERNEL_HASH
hashEntryPtr = Hash_FindEntry(dependHashTable, (Address) processID);
if (hashEntryPtr == (Hash_Entry *) NULL) {
#endif /* KERNEL_HASH */
#ifdef notdef
/*
* (We're not going to care if it's already been removed because we
* are called to get rid of processIDs that may already have been
* removed, and we're just making doubly sure.)
*/
if (proc_MigDebugLevel > 0) {
if (proc_MigDebugLevel > 4) {
panic("ProcMigRemoveDependency: process %x not registered.\n",
processID);
} else {
printf(
"%s ProcMigRemoveDependency: process %x not registered.\n",
"Warning:", processID);
}
}
#endif /* notdef */
UNLOCK_MONITOR;
return;
}
dependPtr = (DependInfo *) Hash_GetValue(hashEntryPtr);
if (!notified) {
dependPtr->flags |= DEPEND_UNREACHABLE;
UNLOCK_MONITOR;
return;
}
#ifdef KERNEL_HASH
Hash_Delete(dependHashTable, hashEntryPtr);
#else KERNEL_HASH
Hash_DeleteEntry(dependHashTable, hashEntryPtr);
#endif /* KERNEL_HASH */
Recov_RebootUnRegister(dependPtr->hostID, HostChanged,
(ClientData) HOST_UP);
free ((Address) dependPtr);
UNLOCK_MONITOR;
}
/*
*----------------------------------------------------------------------
*
* Proc_WaitForHost --
*
* Wait until a host has fully crashed, come back, or rebooted.
* This will return a failure code if the host crashed.
*
* Results:
* SUCCESS - the host is useable again.
* NET_UNREACHABLE_HOST - the host crashed.
* GEN_ABORTED_BY_SIGNAL - a signal was received while waiting.
*
* Side effects:
* Block the process if waiting for the host to return or crash.
*
*----------------------------------------------------------------------
*/
ENTRY ReturnStatus
Proc_WaitForHost(hostID)
int hostID;
{
register ReturnStatus status = SUCCESS;
int hostState;
LOCK_MONITOR;
hostState = Recov_GetClientState(hostID);
while (hostState & RECOV_HOST_DYING) {
if (Sync_Wait(&recovCondition, TRUE)) {
status = GEN_ABORTED_BY_SIGNAL;
break;
}
hostState = Recov_GetClientState(hostID);
}
if (status == SUCCESS) {
if (hostState & (RECOV_HOST_DEAD | RECOV_HOST_BOOTING)) {
/*
* Host has crashed and is either out of touch or rebooting now.
* Cause migrated clients to get killed.
*/
status = NET_UNREACHABLE_HOST;
} else if (hostState & RECOV_HOST_ALIVE) {
/*
* Host is back, so continue business as usual.
*/
status = SUCCESS;
}
}
UNLOCK_MONITOR;
return(status);
}